
clear
capture log close
set more off

clear
use income_b1994.dta //cps data base year 1994 $1994 
keep if year==1994
egen sumhh=total(asecfwt)
gen weight=asecfwt/sumhh
sum hhincome_1994[aweight=weight],d
drop if missing(hhincome_1994)
pctile pct = hhincome_1994 [w=weight], nq(20) genp(percent)
list percent pct in 1/20


*** each quarter in 1994 get mean kwh by division*income weights
forval i=1/4{
clear
use "$dta1\intrvw94\intrvw94\fmli94`i'.dta"
gen elc=elctrccq+elctrcpq
drop if elctrccq==0
drop if fincbtax==0

gen group=1 if fincbtax<5100
replace group=2 if fincbtax>5100 & fincbtax<7656
replace group=3 if fincbtax>7656 & fincbtax<10192
replace group=4 if fincbtax>10192 & fincbtax<12900
replace group=5 if fincbtax>12900 & fincbtax<15612
replace group=6 if fincbtax>15612 & fincbtax<18552
replace group=7 if fincbtax>18552 & fincbtax<21477
replace group=8 if fincbtax>21477 & fincbtax<24604
replace group=9 if fincbtax>24604 & fincbtax<27700
replace group=10 if fincbtax>27700 & fincbtax<31000
replace group=11 if fincbtax>31000 & fincbtax<34700
replace group=12 if fincbtax>34700 & fincbtax<38701
replace group=13 if fincbtax>38701 & fincbtax<43063
replace group=14 if fincbtax>43063 & fincbtax<48200
replace group=15 if fincbtax>48200 & fincbtax<53600
replace group=16 if fincbtax>53600 & fincbtax<60267
replace group=17 if fincbtax>60267 & fincbtax<69090
replace group=18 if fincbtax>69090 & fincbtax<81670
replace group=19 if fincbtax>81670 & fincbtax<102734
replace group=20 if fincbtax>102734


gen year=1994
merge m:1 year state using price.dta
rename residential price
drop if _merge==2


replace price=price/100

gen kwh=elc/price
drop _merge
merge m:1 state using state_division.dta
drop if _merge==2

bysort group division: egen sumhh=total(finlwt21)
bysort group division: gen weight=finlwt21/sumhh
bysort group division: egen meankwh=total(weight*kwh)
rename meankwh meankwh`i'

 collapse meankwh,by(group division)
 drop if group==.
 drop if division==.
save 1994_q`i'.dta,replace
}

** append

clear
use 1994_q1
forval i=2/4 {
merge  1:1  group division using 1994_q`i'
drop _merge
}
gen kwh=meankwh1+meankwh2+meankwh3+meankwh4
  
keep group kwh division
save 1994ce.dta,replace 

** 94 year as base
 clear

. use "bls_prices_2.dta"

* normalize cpi to equal one in base years

foreach num in 1994  {
	gen norm`num' = cpi if year==`num'
	sort year
	replace norm`num' = norm`num'[_n-1] if norm`num'==.
	gsort -year
	replace norm`num' = norm`num'[_n-1] if norm`num'==.
	gen cpi_`num' = cpi/norm`num'
	
}
* save
keep year cpi_*
sort year
save cpi_allyears_94, replace 


clear
use cps_00003
sort year serial pernum
merge m:1 year using cpi_allyears_94
*keep if _merge==3
drop _merge

* calculate real income

gen hhincome_1994 = hhincome/cpi_1994

** keep household head only
keep if relate==101
	
* Census division
gen division = 1 if region==11
replace division = 2 if region==12
replace division = 3 if region==21
replace division = 4 if region==22
replace division = 5 if region==31
replace division = 6 if region==32
replace division = 7 if region==33
replace division = 8 if region==41
replace division = 9 if region==42

save income_b1994.dta,replace


clear
use income_b1994.dta

forvalues i=1994/2020 {
use income_b1994.dta

* create real income groups
gen group=1 if hhincome_1994<5100
replace group=2 if hhincome_1994>5100 & hhincome_1994<7656
replace group=3 if hhincome_1994>7656 & hhincome_1994<10192
replace group=4 if hhincome_1994>10192 & hhincome_1994<12900
replace group=5 if hhincome_1994>12900 & hhincome_1994<15612
replace group=6 if hhincome_1994>15612 & hhincome_1994<18552
replace group=7 if hhincome_1994>18552 & hhincome_1994<21477
replace group=8 if hhincome_1994>21477 & hhincome_1994<24604
replace group=9 if hhincome_1994>24604 & hhincome_1994<27700
replace group=10 if hhincome_1994>27700 & hhincome_1994<31000
replace group=11 if hhincome_1994>31000 & hhincome_1994<34700
replace group=12 if hhincome_1994>34700 & hhincome_1994<38701
replace group=13 if hhincome_1994>38701 & hhincome_1994<43063
replace group=14 if hhincome_1994>43063 & hhincome_1994<48200
replace group=15 if hhincome_1994>48200 & hhincome_1994<53600
replace group=16 if hhincome_1994>53600 & hhincome_1994<60267
replace group=17 if hhincome_1994>60267 & hhincome_1994<69090
replace group=18 if hhincome_1994>69090 & hhincome_1994<81670
replace group=19 if hhincome_1994>81670 & hhincome_1994<102734
replace group=20 if hhincome_1994>102734


keep if year==`i'

collapse (sum) asecfwt, by(group division)
rename asecfwt hh_count

egen sumhh=total(hh_count)
gen weight=hh_count/sumhh
save `i'_groupdivision94,replace
}



*** predictefd kwh

clear
forval i=1994/2020  {
clear
use 1994ce

quietly drop if division==. 
sort group division
quietly destring group,replace

quietly merge 1:1 group division using  `i'_groupdivision94

quietly drop if kwh==.
quietly drop if hh_count==.

*sum kwh // this sum is not estimated/ estimated is by weights, this is just summary
quietly sum kwh[aweight=weight]
quietly return list
dis `i',r(mean)

}


*****====standard errors====*****


*** generate 1994 file by merging 4 quarters of data

forval i=1/4{
clear
use "$dta1\intrvw94\intrvw94\fmli94`i'.dta"
gen elc=elctrccq+elctrcpq
drop if elctrccq==0
drop if fincbtax==0

gen group=1 if fincbtax<5100
replace group=2 if fincbtax>5100 & fincbtax<7656
replace group=3 if fincbtax>7656 & fincbtax<10192
replace group=4 if fincbtax>10192 & fincbtax<12900
replace group=5 if fincbtax>12900 & fincbtax<15612
replace group=6 if fincbtax>15612 & fincbtax<18552
replace group=7 if fincbtax>18552 & fincbtax<21477
replace group=8 if fincbtax>21477 & fincbtax<24604
replace group=9 if fincbtax>24604 & fincbtax<27700
replace group=10 if fincbtax>27700 & fincbtax<31000
replace group=11 if fincbtax>31000 & fincbtax<34700
replace group=12 if fincbtax>34700 & fincbtax<38701
replace group=13 if fincbtax>38701 & fincbtax<43063
replace group=14 if fincbtax>43063 & fincbtax<48200
replace group=15 if fincbtax>48200 & fincbtax<53600
replace group=16 if fincbtax>53600 & fincbtax<60267
replace group=17 if fincbtax>60267 & fincbtax<69090
replace group=18 if fincbtax>69090 & fincbtax<81670
replace group=19 if fincbtax>81670 & fincbtax<102734
replace group=20 if fincbtax>102734


gen year=1994
merge m:1 year state using price.dta
rename residential price
drop if _merge==2


replace price=price/100
gen kwh=elc/price
drop _merge
merge m:1 state using state_division.dta
drop if _merge==2
drop _merge

bysort group division: egen sumhh=total(finlwt21)
bysort group division: gen weight=finlwt21/sumhh
bysort group division: egen meankwh=total(weight*kwh)
rename meankwh meankwh`i'

 drop if group==.
 drop if division==.
save cex1994_q`i'.dta,replace

}

forval i=1/4 {
use cex1994_q`i'.dta,clear
capture gen cuid=substr(newid,1,5)
save,replace
}

use cex1994_q1.dta,clear

capture drop _merge
forval i=2/4 {
 merge 1:1 cuid using cex1994_q`i'.dta 
 keep if _merge==3
 drop _merge
}

save cex1994.dta,replace
*** standard errors

capture drop _merge
forval i=1994/2020 {
use cex1994,clear
drop weight // this is important becasue this weight is from group weighted

quietly merge m:1 group division using `i'_groupdivision94
quietly keep if _merge==3

egen hhgroup=group(group division)

quietly reg kwh i.hhgroup [aweight=weight], nocons
estimates store results1
quietly esttab results1, b(3) se(3) scalars(r2) star( * 0.10 ** 0.05 *** 0.01) 

quietly matrix B=e(V)

collapse weight,by (group division) 
quietly sort group division 
quietly mkmat weight,matrix(A)
*local dim1 (`= rowsof(A)',`=colsof(A)') 
*quietly dis"`dim1'" 
quietly matrix SD=A'*B*A
*quietly matrix list SD
quietly scalar z=SD[1,1]
di `i',sqrt(z)
}



*** counterfactual with income growth

**** generate new cps_division count

clear
use income_b1994.dta

bysort year: egen sumhh=total(asecfwt)
bysort year: gen weight=asecfwt/sumhh
bysort year: egen meanincome=total(weight*hhincome_1994)

collapse meanincome,by(year)

gen grate=meanincome/meanincome[_n-1]
replace grate=1 if missing(grate) 
** this chunk of code is to get grate as of 1990
gen lngrate = ln(grate)
gen lntotal = sum(lngrate)
gen double grateproduct = exp(lntotal)
drop grate
rename grateproduct grate
drop lngrate
drop lntotal
drop meanincome
save grate94.dta,replace

**generate new income
clear
use income_b1994.dta
merge m:1 year using grate94
drop _merge
sort year

gen nn_income=hhincome_1994*grate
order hhincome_1994 nn_income grate

save income_b1994_grate.dta,replace
** 1994 -2020
clear

forvalues num=1994/2020 {
use income_b1994_grate.dta
*** new income groups
	gen ngroup=1 if nn_income<5100
replace ngroup=2 if nn_income>5100 & nn_income<7656
replace ngroup=3 if nn_income>7656 & nn_income<10192
replace ngroup=4 if nn_income>10192 & nn_income<12900
replace ngroup=5 if nn_income>12900 & nn_income<15612
replace ngroup=6 if nn_income>15612 & nn_income<18552
replace ngroup=7 if nn_income>18552 & nn_income<21477
replace ngroup=8 if nn_income>21477 & nn_income<24604
replace ngroup=9 if nn_income>24604 & nn_income<27700
replace ngroup=10 if nn_income>27700 & nn_income<31000
replace ngroup=11 if nn_income>31000 & nn_income<34700
replace ngroup=12 if nn_income>34700 & nn_income<38701
replace ngroup=13 if nn_income>38701 & nn_income<43063
replace ngroup=14 if nn_income>43063 & nn_income<48200
replace ngroup=15 if nn_income>48200 & nn_income<53600
replace ngroup=16 if nn_income>53600 & nn_income<60267
replace ngroup=17 if nn_income>60267 & nn_income<69090
replace ngroup=18 if nn_income>69090 & nn_income<81670
replace ngroup=19 if nn_income>81670 & nn_income<102734
replace ngroup=20 if nn_income>102734

keep if year==`num'

collapse (sum) asecwth, by(ngroup division)
rename  asecwth hh_count

egen sumhh=total(hh_count)
gen weight=hh_count/sumhh
save grate`num'_94,replace
}

*** predictefd kwh

clear
forval i=1994/2020  {
clear
use 1994ce // 
quietly drop if division==. 
quietly rename group ngroup
quietly sort ngroup division

quietly destring ngroup,replace

quietly merge 1:1 ngroup division using  grate`i'_94

quietly drop if kwh==.
quietly drop if hh_count==.

*sum kwh // this sum is not estimated/ estimated is by weights, this is just summary
quietly sum kwh[aweight=weight]
quietly return list
di r(mean)
}



*** standard errors

capture drop _merge
forval i=1994/2020 {
use cex1994,clear
drop weight // this is important becasue this weight is from group weighted
quietly rename group ngroup

quietly merge m:1 ngroup division using grate`i'_94
quietly keep if _merge==3

egen hhgroup=group(ngroup division)

quietly reg kwh i.hhgroup [aweight=weight], nocons
estimates store results1
quietly esttab results1, b(3) se(3) scalars(r2) star( * 0.10 ** 0.05 *** 0.01) 

quietly matrix B=e(V)

collapse weight,by (ngroup division) 
quietly sort ngroup division 
quietly mkmat weight,matrix(A)
*local dim1 (`= rowsof(A)',`=colsof(A)') 
*quietly dis"`dim1'" 
quietly matrix SD=A'*B*A
*quietly matrix list SD
quietly scalar z=SD[1,1]
di `i',sqrt(z)
}



***income change over years
clear
use income_b1994.dta // this file uses $1990

forvalues i=1994/2020 {
use income_b1994.dta

* create real income groups

gen group=1 if hhincome_1994<5100
replace group=2 if hhincome_1994>5100 & hhincome_1994<7656
replace group=3 if hhincome_1994>7656 & hhincome_1994<10192
replace group=4 if hhincome_1994>10192 & hhincome_1994<12900
replace group=5 if hhincome_1994>12900 & hhincome_1994<15612
replace group=6 if hhincome_1994>15612 & hhincome_1994<18552
replace group=7 if hhincome_1994>18552 & hhincome_1994<21477
replace group=8 if hhincome_1994>21477 & hhincome_1994<24604
replace group=9 if hhincome_1994>24604 & hhincome_1994<27700
replace group=10 if hhincome_1994>27700 & hhincome_1994<31000
replace group=11 if hhincome_1994>31000 & hhincome_1994<34700
replace group=12 if hhincome_1994>34700 & hhincome_1994<38701
replace group=13 if hhincome_1994>38701 & hhincome_1994<43063
replace group=14 if hhincome_1994>43063 & hhincome_1994<48200
replace group=15 if hhincome_1994>48200 & hhincome_1994<53600
replace group=16 if hhincome_1994>53600 & hhincome_1994<60267
replace group=17 if hhincome_1994>60267 & hhincome_1994<69090
replace group=18 if hhincome_1994>69090 & hhincome_1994<81670
replace group=19 if hhincome_1994>81670 & hhincome_1994<102734
replace group=20 if hhincome_1994>102734

keep if year==`i'

collapse (sum) asecfwt, by(group)
rename asecfwt hh_count

egen sumhh=total(hh_count)
gen weight=hh_count/sumhh
save `i'_income94,replace
}

* regroup 
clear
forval i=1994/2020 {
use `i'_income94
egen sumweight=sum(weight)
recode group (1/5=1) (6/10=2) (10/15=3) (16/20=4), generate(inc_plot)
collapse (sum) weight,by (inc_plot)
rename weight weight_`i' 
save inc94_`i'.dta,replace
}

** append

clear
use inc94_1994
forval i=1994/2020 {
capture drop _merge
merge 1:1 inc_plot using inc94_`i'
}
drop if inc_plot==.


*** division change over years 


clear
use income_b1994.dta

forvalues i=1994/2020 {
use income_b1994.dta

keep if year==`i'

collapse (sum) asecfwt, by(division)
rename asecfwt hh_count

egen sumhh=total(hh_count)
gen weight=hh_count/sumhh
save `i'_division94,replace
}

clear
use 1994_division94
rename weight weight_1994

forval i=1995/2020 {
capture drop _merge

merge 1:1 division using `i'_division94
rename weight weight_`i'
}

drop hh_count sumhh _merge
save divsion_overyears.dta,replace

** regroup into 4 regions
clear
forval i=1994/2020 {
use `i'_division94
egen sumweight=sum(weight)
recode division (1/2=1) (3/4=2) (5/7=3) (8/9=4), generate(region)
collapse (sum) weight,by (region)
rename weight weight_`i' 
save region_`i'.dta,replace
}

** append

clear
use region_1994
forval i=1994/2020 {
capture drop _merge
merge 1:1 region using region_`i'
}


***** by income groups 1994

clear
use income_b1994.dta
keep if year==1994
egen sumhh=total(asecfwt)
gen weight=asecfwt/sumhh
sum hhincome_1994[aweight=weight],d
drop if missing(hhincome_1994)
pctile pct = hhincome_1994 [w=weight], nq(20) genp(percent)
list percent pct in 1/20

forval i=1/4{
clear
use "$dta1\intrvw94\intrvw94\fmli94`i'.dta"
gen elc=elctrccq+elctrcpq
drop if elctrccq==0
drop if fincbtax==0

gen group=1 if fincbtax<5100
replace group=2 if fincbtax>5100 & fincbtax<7656
replace group=3 if fincbtax>7656 & fincbtax<10192
replace group=4 if fincbtax>10192 & fincbtax<12900
replace group=5 if fincbtax>12900 & fincbtax<15612
replace group=6 if fincbtax>15612 & fincbtax<18552
replace group=7 if fincbtax>18552 & fincbtax<21477
replace group=8 if fincbtax>21477 & fincbtax<24604
replace group=9 if fincbtax>24604 & fincbtax<27700
replace group=10 if fincbtax>27700 & fincbtax<31000
replace group=11 if fincbtax>31000 & fincbtax<34700
replace group=12 if fincbtax>34700 & fincbtax<38701
replace group=13 if fincbtax>38701 & fincbtax<43063
replace group=14 if fincbtax>43063 & fincbtax<48200
replace group=15 if fincbtax>48200 & fincbtax<53600
replace group=16 if fincbtax>53600 & fincbtax<60267
replace group=17 if fincbtax>60267 & fincbtax<69090
replace group=18 if fincbtax>69090 & fincbtax<81670
replace group=19 if fincbtax>81670 & fincbtax<102734
replace group=20 if fincbtax>102734


gen year=1994
merge m:1 year state using price.dta
rename residential price
drop if _merge==2


replace price=price/100
gen kwh=elc/price

bysort group: egen sumhh=total(finlwt21)
bysort group: gen weight=finlwt21/sumhh
bysort group: egen meankwh=total(weight*kwh)
rename meankwh meankwh`i'

 collapse meankwh,by(group)
 drop if group==.
save 1994byincome_q`i'.dta,replace
}

** append

clear
use 1994byincome_q1
forval i=2/4 {
merge  1:1  group using 1994byincome_q`i'
drop _merge
}
gen kwh=meankwh1+meankwh2+meankwh3+meankwh4
  
keep group kwh
save 1994cebyincome.dta,replace
